{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Authors: Andreas Haupt, Alexander Quispe, Anzony Quispe "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Simulation Design"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install hdmpy\n",
    "import random\n",
    "import hdmpy\n",
    "import numpy as np\n",
    "import statsmodels.api as sm\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib import colors"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set seed\n",
    "np.random.seed(0)\n",
    "B = 100\n",
    "Naive = np.zeros( B )\n",
    "Orthogonal = np.zeros( B )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range( 0, B ):\n",
    "    n = 100\n",
    "    p = 100\n",
    "    beta = ( 1 / (np.arange( 1, p + 1 ) ** 2 ) ).reshape( p , 1 )\n",
    "    gamma = ( 1 / (np.arange( 1, p + 1 ) ** 2 ) ).reshape( p , 1 )\n",
    "\n",
    "    mean = 0\n",
    "    sd = 1\n",
    "    X = np.random.normal( mean , sd, n * p ).reshape( n, p )\n",
    "\n",
    "    D = ( X @ gamma ) + np.random.normal( mean , sd, n ).reshape( n, 1 )/4 # We reshape because in r when we sum a vecto with a matrix it sum by column\n",
    "    \n",
    "    # DGP \n",
    "    Y = D + ( X @ beta ) + np.random.normal( mean , sd, n ).reshape( n, 1 )\n",
    "\n",
    "    # single selection method\n",
    "    r_lasso_estimation = hdmpy.rlasso( np.concatenate( ( D , X ) , axis  =  1 ) , Y , post = True ) # Regress main equation by lasso\n",
    "\n",
    "    coef_array = r_lasso_estimation.est[ 'coefficients' ].iloc[ 2:, :].to_numpy()    # Get \"X\" coefficients \n",
    "\n",
    "    SX_IDs = np.where( coef_array != 0 )[0]\n",
    "\n",
    "    # In case all X coefficients are zero, then regress Y on D\n",
    "    if sum(SX_IDs) == 0 : \n",
    "        Naive[ i ] = sm.OLS( Y , sm.add_constant(D) ).fit().summary2().tables[1].round(3).iloc[ 1, 0 ] \n",
    "\n",
    "    # Otherwise, then regress Y on X and D (but only in the selected coefficients)\n",
    "    elif sum( SX_IDs ) > 0 :\n",
    "        X_D = np.concatenate( ( D, X[:, SX_IDs ] ) , axis = 1 )\n",
    "        Naive[ i ] = sm.OLS( Y , sm.add_constant( X_D ) ).fit().summary2().tables[1].round(3).iloc[ 1, 0]\n",
    "\n",
    "    # In both cases we save D coefficient\n",
    "        \n",
    "    # Regress residuals. \n",
    "    resY = hdmpy.rlasso( X , Y , post = False ).est[ 'residuals' ]\n",
    "    resD = hdmpy.rlasso( X , D , post = False ).est[ 'residuals' ]\n",
    "    Orthogonal[ i ] = sm.OLS( resY , sm.add_constant( resD ) ).fit().summary2().tables[1].round(3).iloc[ 1, 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Orto_breaks = [-1.2, -1, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, 1.8, 2]\n",
    "Naive_breaks = [-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1, 1.2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axs = plt.subplots(1, 2, sharex= True, tight_layout=True)\n",
    "\n",
    "# We can set the number of bins with the `bins` kwarg\n",
    "axs[0].hist( Orthogonal - 1 , range = (-2, 2), density = True , bins = Orto_breaks )\n",
    "axs[1].hist( Naive - 1, range = (-2, 2), density = True , bins = Naive_breaks )\n",
    "\n",
    "axs[0].title.set_text('Orthogonal')\n",
    "axs[1].title.set_text('Naive')\n",
    "\n",
    "axs[0].set_xlabel( 'Orhtogonal - True' )\n",
    "axs[1].set_xlabel( 'Naive - True' )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set seed\n",
    "np.random.seed(0)\n",
    "\n",
    "for i in range(0, B):\n",
    "    n = 100\n",
    "    p = 100\n",
    "    beta = ( 1 / (np.arange( 1, p + 1 ) ** 2 ) ).reshape( p , 1 )\n",
    "    gamma = ( 1 / (np.arange( 1, p + 1 ) ** 2 ) ).reshape( p , 1 )\n",
    "\n",
    "    mean = 0\n",
    "    sd = 1\n",
    "    X = np.random.normal( mean , sd, n * p ).reshape( n, p )\n",
    "\n",
    "    D = ( X @ gamma ) + np.random.normal( mean , sd, n ).reshape( n, 1 )/4 # We reshape because in r when we sum a vecto with a matrix it sum by column\n",
    "    Y = D + ( X @ beta ) + np.random.normal( mean , sd, n ).reshape( n, 1 )\n",
    "\n",
    "    # single selectin method\n",
    "    r_lasso_estimation = hdmpy.rlasso( np.concatenate( ( D , X ) , axis  =  1 ) , Y , post = True )\n",
    "\n",
    "    coef_array = r_lasso_estimation.est[ 'coefficients' ].iloc[ 2:, :].to_numpy()\n",
    "\n",
    "    SX_IDs = np.where( coef_array != 0 )[0]\n",
    "\n",
    "    if sum(SX_IDs) == 0 : \n",
    "        Naive[ 0 ] = sm.OLS( Y , sm.add_constant(D) ).fit().summary2().tables[1].round(3).iloc[ 1, 0 ]\n",
    "\n",
    "    elif sum( SX_IDs ) > 0 :\n",
    "        X_D = np.concatenate( ( D, X[:, SX_IDs ] ) , axis = 1 )\n",
    "        Naive[ i ] = sm.OLS( Y , sm.add_constant( X_D ) ).fit().summary2().tables[1].round(3).iloc[ 1, 0]\n",
    "\n",
    "\n",
    "    resY = hdmpy.rlasso( X , Y , post = True ).est[ 'residuals' ]\n",
    "    resD = hdmpy.rlasso( X , D , post = True ).est[ 'residuals' ]\n",
    "    Orthogonal[ i ] = sm.OLS( resY , sm.add_constant( resD ) ).fit().summary2().tables[1].round(3).iloc[ 1, 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axs = plt.subplots(1, 2, sharex= True, tight_layout=True)\n",
    "\n",
    "# We can set the number of bins with the `bins` kwarg\n",
    "axs[0].hist( Orthogonal - 1 , range = (-2, 2), density = True , bins = Orto_breaks )\n",
    "axs[1].hist( Naive - 1, range = (-2, 2), density = True , bins = Naive_breaks )\n",
    "\n",
    "axs[0].title.set_text('Orthogonal')\n",
    "axs[1].title.set_text('Naive')\n",
    "\n",
    "axs[0].set_xlabel( 'Orhtogonal - True' )\n",
    "axs[1].set_xlabel( 'Naive - True' )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
